library(reshape)
library(ggplot2)
`%!in%` = Negate(`%in%`)

# set to desired working directory
# setwd("")

# Start log file for script -----------------------------------------------
TeachingDemos::txtStart("./log files/figureA18_20.txt")

# Load Data ---------------------------------------------------------
# individual data
load("./data/impdat1920.Race.WeekWeight.rdata")
load("./data/impdat20.Race.WeekWeight.rdata")

vars <- c("year", "week", "date_mdy", "ideo_mod", "ideo_con", "age_cat4", "edu_cat4", "race3", "sex",
          "census_region", "broughtwebsite2", "WeekWeights", "D_biep.White_Good_all")
impdat19_sub <- subset(impdat1920.WeekWeight, select = vars)
impdat20_sub <- subset(impdat20.WeekWeight, select = vars)

impdat_comb <- rbind(impdat19_sub, impdat20_sub)

# Race
impdat20.WeekWeight$race_black <- ifelse(impdat20.WeekWeight$race5 == "Black", "Black", "Not Black")
impdat20.WeekWeight$race_black[which(is.na(impdat20.WeekWeight$race5))] <- NA

# Models ------------------------------------------------------------------
# full sample
m1_full <- lm(D_biep.White_Good_all ~ 
                as.factor(week) + ideo_mod + ideo_con +
                age_cat4 + edu_cat4 + race3 + sex +
                census_region + broughtwebsite2,
              data = impdat20.WeekWeight, weights = WeekWeights,
              subset = date_mdy != "2020-05-25")
summary(m1_full)

# Black-White
m1_bw <- lm(D_biep.White_Good_all ~ 
              as.factor(week)*race_black + ideo_mod + ideo_con +
              age_cat4 + edu_cat4 + sex +
              census_region + broughtwebsite2,
            data = impdat20.WeekWeight, weights = WeekWeights,
            subset = date_mdy != "2020-05-25" & (race3 == "White" | race3 == "Black"))
summary(m1_bw)

# Ideo (Whites)
m1_ideo <- lm(D_biep.White_Good_all ~ 
                as.factor(week)*ideo3_lab +
                age_cat4 + edu_cat4 + sex +
                census_region + broughtwebsite2,
              data = impdat20.WeekWeight, weights = WeekWeights,
              subset = date_mdy != "2020-05-25" & race3 == "White")
summary(m1_ideo)


# Prediction Data Sets ----------------------------------------------------
weeks <- names(table(m1_full$model$`as.factor(week)`))

# Full Sample
pdat_full <- data.frame(week = weeks,
                        ideo_mod = 0,
                        ideo_con = 0,
                        race3 = "White",
                        age_cat4 = "18-29",
                        edu_cat4 = "Post-Grad",
                        sex = "f",
                        census_region = "South",
                        broughtwebsite2 = "Assignment school/work")

# Race
pdat_wht <- data.frame(week = weeks,
                       race_white = "White",
                       race_black = "Not Black",
                       ideo_mod = 0,
                       ideo_con = 0,
                       age_cat4 = "18-29",
                       edu_cat4 = "Post-Grad",
                       sex = "f",
                       census_region = "South",
                       broughtwebsite2 = "Assignment school/work")
pdat_blk <- data.frame(week = weeks,
                       race_white = "Not White",
                       race_black = "Black",
                       ideo_mod = 0,
                       ideo_con = 0,
                       age_cat4 = "18-29",
                       edu_cat4 = "Post-Grad",
                       sex = "f",
                       census_region = "South",
                       broughtwebsite2 = "Assignment school/work")

# Ideology (Whites)
pdat_lib <- data.frame(week = weeks,
                       ideo3_lab = "liberal",
                       race3 = "White",
                       age_cat4 = "18-29",
                       edu_cat4 = "Post-Grad",
                       sex = "f",
                       census_region = "South",
                       broughtwebsite2 = "Assignment school/work")

pdat_mod <- data.frame(week = weeks,
                       ideo3_lab = "moderate",
                       race3 = "White",
                       age_cat4 = "18-29",
                       edu_cat4 = "Post-Grad",
                       sex = "f",
                       census_region = "South",
                       broughtwebsite2 = "Assignment school/work")

pdat_con <- data.frame(week = weeks,
                       ideo3_lab = "conservative",
                       race3 = "White",
                       age_cat4 = "18-29",
                       edu_cat4 = "Post-Grad",
                       sex = "f",
                       census_region = "South",
                       broughtwebsite2 = "Assignment school/work")



# Placeboing Differences ----------------------------------------------
# * Full Sample (A18) -----------------------------------------------------------
weeks <- names(table(m1_full$model$`as.factor(week)`))
preds <- predict(m1_full, pdat_full, se.fit = T)

# combine into DF to plot
p_dat <- data.frame(week = weeks,
                    pred = preds$fit,
                    se = preds$se.fit)
p_dat_fill <- data.frame(week = unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)],
                         pred = NA,
                         se = NA)
p_dat <- rbind(p_dat, p_dat_fill)
p_dat$week <- as.Date(paste(2020, p_dat$week, 1, sep="-"), "%Y-%U-%u")
p_dat$week[c(1)] <- "2020-01-01" # from 00 weeks

# Pull eligibile weeks
weeks <- unique(p_dat$week)

# 1 week pre-post
real_difs <- data.frame(week = "May 25",
                        diff = p_dat[which(p_dat$week == "2020-05-25"), "pred"] - p_dat[which(p_dat$week == "2020-05-18"), "pred"])
real_difs <- melt(real_difs, id = c("week"), variable_name = "diff")

# 2 weeks post to week pre
real_difs2 <- data.frame(week = "June 1",
                         diff = p_dat[which(p_dat$week == "2020-06-01"), "pred"] - p_dat[which(p_dat$week == "2020-05-18"), "pred"])
real_difs2 <- melt(real_difs2, id = c("week"), variable_name = "diff")

real_difs_c <- rbind(real_difs, real_difs2)
real_difs_c$week <- factor(real_difs_c$week,
                           levels = c("May 25", "June 1"),
                           labels = c("Week After", "Second Week After"))

# Create empty DF
diffs <- data.frame(week = weeks,
                    change = NA)
# Look over weeks from predictions
# Test is all week-to-week fluctuations apart from the study comparisons
for(i in 2:length(weeks)){
  if(weeks[i] <= "2020-05-18"){
    diffs$change[i] <-  p_dat[which(p_dat$week == weeks[i]), "pred"] - p_dat[which(p_dat$week == weeks[i-1]), "pred"]
  }
  if(weeks[i] == "2020-05-25" | weeks[i] == "2020-06-01"){
    # empty placeholder
  }
  if(weeks[i] > "2020-06-01"){
    diffs$change[i] <-  p_dat[which(p_dat$week == weeks[i]), "pred"] - p_dat[which(p_dat$week == weeks[i-1]), "pred"]
  }
}

# reformat and label to plot
diffs <- melt(diffs, id = c("week"), variable_name = "diff")

# plot
ggplot(diffs, aes(x = value)) +
  geom_histogram(fill = "grey") +
  geom_vline(data = real_difs_c, aes(xintercept = value, linetype = week)) +
  theme_bw() +
  scale_linetype_manual(name = "", values = c(1, 3)) +
  labs(x = "Predicted IAT Score Difference", y = "Count"
       # subtitle = "Observed Difference is Week of May 18 to Week of May 25"
  ) +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 16),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 14),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.text = element_text(size = 16),
        strip.background = element_blank())
ggsave("./figures/FigureA18.pdf", height = 6, width = 8)

# summarize
prop.table(table((diffs[, "value"]) < real_difs$value))
prop.table(table((diffs[, "value"]) < real_difs$value))

# * Black - White (A19) -----------------------------------------------------------
weeks <- names(table(m1_bw$model$`as.factor(week)`))
preds_wht <- predict(m1_bw, pdat_wht, se.fit = T)
preds_blk <- predict(m1_bw, pdat_blk, se.fit = T)

# combine into DF to plot
p_dat <- data.frame(week = weeks,
                    race = c(rep("White", length(weeks)),
                             rep("Black", length(weeks))),
                    pred = c(preds_wht$fit, preds_blk$fit),
                    se = c(preds_wht$se.fit, preds_blk$se.fit))
p_dat_fill <- data.frame(week = unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)],
                         race = c(rep("White", length(unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)])),
                                  rep("Black", length(unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)]))),
                         pred = NA,
                         se = NA)
p_dat <- rbind(p_dat, p_dat_fill)
p_dat$race <- factor(p_dat$race,
                     levels = c("Black",
                                "White"))
p_dat$week <- as.Date(paste(2020, p_dat$week, 1, sep="-"), "%Y-%U-%u")
p_dat$week[c(1,length(weeks)+1)] <- "2020-01-01" # from 00 weeks

# Pull eligibile weeks
weeks <- unique(p_dat$week)

# 1 week pre-post
real_difs <- data.frame(week = "May 25",
                        White = p_dat[which(p_dat$week == "2020-05-25" & p_dat$race == "White"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$race == "White"), "pred"],
                        Black = p_dat[which(p_dat$week == "2020-05-25" & p_dat$race == "Black"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$race == "Black"), "pred"])
real_difs <- melt(real_difs, id = c("week"), variable_name = "race")

# 2 weeks post to week pre
real_difs2 <- data.frame(week = "June 1",
                         White = p_dat[which(p_dat$week == "2020-06-01" & p_dat$race == "White"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$race == "White"), "pred"],
                         Black = p_dat[which(p_dat$week == "2020-06-01" & p_dat$race == "Black"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$race == "Black"), "pred"])
real_difs2 <- melt(real_difs2, id = c("week"), variable_name = "race")

real_difs_c <- rbind(real_difs, real_difs2)
real_difs_c$week <- factor(real_difs_c$week,
                           levels = c("May 25", "June 1"),
                           labels = c("Week After", "Second Week After"))

# Create empty DF
diffs <- data.frame(week = weeks,
                    wht_dif = NA,
                    blk_dif = NA)
# Look over weeks from predictions
# Test is all week-to-week fluctations apart from the study comparisons
for(i in 2:length(weeks)){
  if(weeks[i] <= "2020-05-18"){
    diffs$wht_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$race == "White"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$race == "White"), "pred"]
    diffs$blk_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$race == "Black"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$race == "Black"), "pred"]
  }
  if(weeks[i] == "2020-05-25" | weeks[i] == "2020-06-01"){
    # empty placeholder
  }
  if(weeks[i] > "2020-06-01"){
    diffs$wht_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$race == "White"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$race == "White"), "pred"]
    diffs$blk_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$race == "Black"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$race == "Black"), "pred"]
  }
}

# reformat and label to plot
diffs <- melt(diffs, id = c("week"), variable_name = "race")
diffs$race <- factor(diffs$race,
                     levels = c("wht_dif", "blk_dif"),
                     labels = c("White", "Black"))

# plot
ggplot(diffs, aes(x = value)) +
  geom_histogram(fill = "grey") +
  geom_vline(data = real_difs_c, aes(xintercept = value, linetype = week)) +
  facet_grid(race~.) +
  theme_bw() +
  scale_linetype_manual(name = "", values = c(1, 3)) +
  labs(x = "Predicted IAT Score Difference", y = "Count"
       # subtitle = "Observed Difference is Week of May 18 to Week of May 25"
  ) +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 16),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 14),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.text = element_text(size = 16),
        strip.background = element_blank())
ggsave("./figures/FigureA19.pdf", height = 6, width = 8)

# summarize
prop.table(table((diffs[which(diffs$race == "White"), "value"]) < real_difs$value[which(real_difs$race == "White")]))
prop.table(table((diffs[which(diffs$race == "Black"), "value"]) < real_difs$value[which(real_difs$race == "Black")]))

# * Ideology -- Whites (A20) -------------------------------------------------------
weeks <- names(table(m1_ideo$model$`as.factor(week)`))
preds_lib <- predict(m1_ideo, pdat_lib, se.fit = T)
preds_mod <- predict(m1_ideo, pdat_mod, se.fit = T)
preds_con <- predict(m1_ideo, pdat_con, se.fit = T)

# combine into DF to plot
p_dat <- data.frame(week = weeks,
                    ideo = c(rep("Liberal", length(weeks)),
                             rep("Neutral", length(weeks)),
                             rep("Conservative", length(weeks))),
                    pred = c(preds_lib$fit, preds_mod$fit, preds_con$fit),
                    se = c(preds_lib$se.fit, preds_mod$se.fit, preds_con$se.fit))
p_dat_fill <- data.frame(week = unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)],
                         ideo = c(rep("Liberal", length(unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)])),
                                  rep("Neutral", length(unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)])),
                                  rep("Conservative", length(unique(impdat20.WeekWeight$week)[which(unique(impdat20.WeekWeight$week) %!in% weeks)]))),
                         pred = NA,
                         se = NA)
p_dat <- rbind(p_dat, p_dat_fill)
p_dat$ideo <- factor(p_dat$ideo,
                     levels = c("Liberal",
                                "Neutral",
                                "Conservative"))
p_dat$week <- as.Date(paste(2020, p_dat$week, 1, sep="-"), "%Y-%U-%u")
p_dat$week[c(1,length(weeks)+1, length(weeks)*2+1)] <- "2020-01-01" # from 00 weeks

# Pull eligibile weeks
weeks <- unique(p_dat$week)

# 1 week pre-post
real_difs <- data.frame(week = "May 25",
                        Liberal = p_dat[which(p_dat$week == "2020-05-25" & p_dat$ideo == "Liberal"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Liberal"), "pred"],
                        Neutral = p_dat[which(p_dat$week == "2020-05-25" & p_dat$ideo == "Neutral"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Neutral"), "pred"],
                        Conservative = p_dat[which(p_dat$week == "2020-05-25" & p_dat$ideo == "Conservative"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Conservative"), "pred"])
real_difs <- melt(real_difs, id = c("week"), variable_name = "ideo")

# 2 weeks post to week pre
real_difs2 <- data.frame(week = "June 1",
                         Liberal = p_dat[which(p_dat$week == "2020-06-01" & p_dat$ideo == "Liberal"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Liberal"), "pred"],
                         Neutral = p_dat[which(p_dat$week == "2020-06-01" & p_dat$ideo == "Neutral"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Neutral"), "pred"],
                         Conservative = p_dat[which(p_dat$week == "2020-06-01" & p_dat$ideo == "Conservative"), "pred"] - p_dat[which(p_dat$week == "2020-05-18" & p_dat$ideo == "Conservative"), "pred"])
real_difs2 <- melt(real_difs2, id = c("week"), variable_name = "ideo")

real_difs_c <- rbind(real_difs, real_difs2)
real_difs_c$week <- factor(real_difs_c$week,
                           levels = c("May 25", "June 1"),
                           labels = c("Week After", "Second Week After"))

# Create empty DF
diffs <- data.frame(week = weeks,
                    lib_dif = NA,
                    neut_dif = NA,
                    con_dif = NA)
# Look over weeks from predictions
# Test is all week-to-week fluctations apart from the study comparisons
for(i in 2:length(weeks)){
  if(weeks[i] <= "2020-05-18"){
    diffs$lib_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Liberal"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Liberal"), "pred"]
    diffs$neut_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Neutral"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Neutral"), "pred"]
    diffs$con_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Conservative"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Conservative"), "pred"]
  }
  if(weeks[i] == "2020-05-25" | weeks[i] == "2020-06-01"){
    # empty placeholder
  }
  if(weeks[i] > "2020-06-01"){
    diffs$lib_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Liberal"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Liberal"), "pred"]
    diffs$neut_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Neutral"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Neutral"), "pred"]
    diffs$con_dif[i] <-  p_dat[which(p_dat$week == weeks[i] & p_dat$ideo == "Conservative"), "pred"] - p_dat[which(p_dat$week == weeks[i-1] & p_dat$ideo == "Conservative"), "pred"]
  }
}

# reformat and label to plot
diffs <- melt(diffs, id = c("week"), variable_name = "ideo")
diffs$ideo <- factor(diffs$ideo,
                     levels = c("lib_dif", "neut_dif", "con_dif"),
                     labels = c("Liberal", "Neutral", "Conservative"))

# plot
ggplot(diffs, aes(x = value)) +
  geom_histogram(fill = "grey") +
  geom_vline(data = real_difs_c, aes(xintercept = value, linetype = week)) +
  facet_grid(ideo~.) +
  theme_bw() +
  scale_linetype_manual(name = "", values = c(1, 3)) +
  labs(x = "Predicted IAT Score Difference", y = "Count"
       # subtitle = "Observed Difference is Week of May 18 to Week of May 25"
  ) +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 16),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 14),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.text = element_text(size = 16),
        strip.background = element_blank())
ggsave("./figures/FigureA20.pdf", height = 6, width = 8)


# summarize
prop.table(table((diffs[which(diffs$ideo == "Liberal"), "value"]) < real_difs$value[which(real_difs$ideo == "Liberal")]))
prop.table(table((diffs[which(diffs$ideo == "Neutral"), "value"]) < real_difs$value[which(real_difs$ideo == "Neutral")]))
prop.table(table((diffs[which(diffs$ideo == "Conservative"), "value"]) < real_difs$value[which(real_difs$ideo == "Conservative")]))


prop.table(table((diffs[which(diffs$ideo == "Liberal"), "value"]) < real_difs$value[which(real_difs$ideo == "Liberal")]))
prop.table(table((diffs[which(diffs$ideo == "Neutral"), "value"]) < real_difs$value[which(real_difs$ideo == "Neutral")]))
prop.table(table((diffs[which(diffs$ideo == "Conservative"), "value"]) < real_difs$value[which(real_difs$ideo == "Conservative")]))


# End log file for script -------------------------------------------------
TeachingDemos::txtStop()